MACS 30500
University of Chicago
The whole system and structure of a language or of languages in general, usually taken as consisting of syntax and morphology (including inflections) and sometimes also phonology and semantics.
mpg## # A tibble: 234 × 11
## manufacturer model displ year cyl trans drv cty hwy
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29
## 3 audi a4 2.0 2008 4 manual(m6) f 20 31
## 4 audi a4 2.0 2008 4 auto(av) f 21 30
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26
## 7 audi a4 3.1 2008 6 auto(av) f 18 27
## 8 audi a4 quattro 1.8 1999 4 manual(m5) 4 18 26
## 9 audi a4 quattro 1.8 1999 4 auto(l5) 4 16 25
## 10 audi a4 quattro 2.0 2008 4 manual(m6) 4 20 28
## # ... with 224 more rows, and 2 more variables: fl <chr>, class <chr>
mpg## # A tibble: 234 × 2
## displ hwy
## <dbl> <int>
## 1 1.8 29
## 2 1.8 29
## 3 2.0 31
## 4 2.0 30
## 5 2.8 26
## 6 2.8 26
## 7 3.1 27
## 8 1.8 26
## 9 1.8 25
## 10 2.0 28
## # ... with 224 more rows
mpg## # A tibble: 234 × 2
## x y
## <dbl> <int>
## 1 1.8 29
## 2 1.8 29
## 3 2.0 31
## 4 2.0 30
## 5 2.8 26
## 6 2.8 26
## 7 3.1 27
## 8 1.8 26
## 9 1.8 25
## 10 2.0 28
## # ... with 224 more rows
## # A tibble: 234 × 1
## cyl
## <int>
## 1 4
## 2 4
## 3 4
## 4 4
## 5 6
## 6 6
## 7 6
## 8 4
## 9 4
## 10 4
## # ... with 224 more rows
## # A tibble: 4 × 2
## cyl n
## <int> <int>
## 1 4 81
## 2 5 4
## 3 6 79
## 4 8 70
ggplot() +
layer(
data = mpg, mapping = aes(x = displ, y = hwy),
geom = "point", stat = "identity", position = "identity"
) +
scale_x_continuous() +
scale_y_continuous() +
coord_cartesian()ggplot() +
layer(
data = mpg, mapping = aes(x = displ, y = hwy),
geom = "point", stat = "identity", position = "identity"
) +
scale_x_continuous() +
scale_y_continuous() +
coord_cartesian()ggplot() +
layer(
data = mpg, mapping = aes(x = displ, y = hwy),
geom = "point"
)ggplot() +
layer(
data = mpg, mapping = aes(x = displ, y = hwy),
geom = "point", stat = "identity", position = "identity"
) +
scale_x_continuous() +
scale_y_continuous() +
coord_cartesian()ggplot() +
layer(
data = mpg, mapping = aes(x = displ, y = hwy),
geom = "point"
)ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point()ggplot() +
layer(
data = mpg, mapping = aes(x = displ, y = hwy),
geom = "point", stat = "identity", position = "identity"
) +
scale_x_continuous() +
scale_y_continuous() +
coord_cartesian()ggplot() +
layer(
data = mpg, mapping = aes(x = displ, y = hwy),
geom = "point"
)ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point()ggplot(mpg, aes(displ, hwy)) +
geom_point()ggplot(mpg, aes(displ, hwy)) +
geom_point()ggplot(mpg, aes(displ, hwy)) +
geom_point() +
geom_smooth()## `geom_smooth()` using method = 'loess'
troops <- read_table("data/minard-troops.txt")
cities <- read_table("data/minard-cities.txt")
troops## # A tibble: 51 × 5
## long lat survivors direction group
## <dbl> <dbl> <int> <chr> <int>
## 1 24.0 54.9 340000 A 1
## 2 24.5 55.0 340000 A 1
## 3 25.5 54.5 340000 A 1
## 4 26.0 54.7 320000 A 1
## 5 27.0 54.8 300000 A 1
## 6 28.0 54.9 280000 A 1
## 7 28.5 55.0 240000 A 1
## 8 29.0 55.1 210000 A 1
## 9 30.0 55.2 180000 A 1
## 10 30.3 55.3 175000 A 1
## # ... with 41 more rows
cities## # A tibble: 20 × 3
## long lat city
## <dbl> <dbl> <chr>
## 1 24.0 55.0 Kowno
## 2 25.3 54.7 Wilna
## 3 26.4 54.4 Smorgoni
## 4 26.8 54.3 Moiodexno
## 5 27.7 55.2 Gloubokoe
## 6 27.6 53.9 Minsk
## 7 28.5 54.3 Studienska
## 8 28.7 55.5 Polotzk
## 9 29.2 54.4 Bobr
## 10 30.2 55.3 Witebsk
## 11 30.4 54.5 Orscha
## 12 30.4 53.9 Mohilow
## 13 32.0 54.8 Smolensk
## 14 33.2 54.9 Dorogobouge
## 15 34.3 55.2 Wixma
## 16 34.4 55.5 Chjat
## 17 36.0 55.5 Mojaisk
## 18 37.6 55.8 Moscou
## 19 36.6 55.3 Tarantino
## 20 36.5 55.0 Malo-Jarosewii
plot_troops <- ggplot(troops, aes(long, lat)) +
geom_path(aes(size = survivors,
color = direction,
group = group))
plot_troopsplot_both <- plot_troops +
geom_text(data = cities, aes(label = city), size = 4)
plot_bothplot_polished <- plot_both +
scale_size(range = c(0, 12),
breaks = c(10000, 20000, 30000),
labels = c("10,000", "20,000", "30,000")) +
scale_color_manual(values = c("tan", "grey50")) +
coord_map() +
labs(title = "Map of Napoleon's Russian campaign of 1812",
x = NULL,
y = NULL)
plot_polishedplot_polished +
theme_void() +
theme(legend.position = "none")library(ggplot2)
library(tibble)
# install.packages("gapminder")
library(gapminder)
data("gapminder")
gapminder## # A tibble: 1,704 × 6
## country continent year lifeExp pop gdpPercap
## <fctr> <fctr> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.801 8425333 779.4453
## 2 Afghanistan Asia 1957 30.332 9240934 820.8530
## 3 Afghanistan Asia 1962 31.997 10267083 853.1007
## 4 Afghanistan Asia 1967 34.020 11537966 836.1971
## 5 Afghanistan Asia 1972 36.088 13079460 739.9811
## 6 Afghanistan Asia 1977 38.438 14880372 786.1134
## 7 Afghanistan Asia 1982 39.854 12881816 978.0114
## 8 Afghanistan Asia 1987 40.822 13867957 852.3959
## 9 Afghanistan Asia 1992 41.674 16317921 649.3414
## 10 Afghanistan Asia 1997 41.763 22227415 635.3414
## # ... with 1,694 more rows
gdpPercap?What is the average life expectancy, per continent?
ggplot(gapminder, aes(continent, lifeExp)) +
geom_boxplot()What is the relationship between GDP and life expectancy?
ggplot(gapminder, aes(gdpPercap, lifeExp)) +
geom_point() +
geom_smooth()## `geom_smooth()` using method = 'gam'
How is the relationship moderated by continent?
ggplot(gapminder, aes(gdpPercap, lifeExp, color = continent)) +
geom_point() +
geom_smooth()## `geom_smooth()` using method = 'loess'
ggplot(gapminder, aes(gdpPercap, lifeExp, color = continent)) +
geom_point() +
geom_smooth() +
facet_wrap(~continent)## `geom_smooth()` using method = 'loess'
Bonus: what is causing the outlier in gdpPercap?
ggplot(gapminder, aes(gdpPercap, lifeExp)) +
geom_point() +
geom_smooth() +
geom_text(aes(label = country))## `geom_smooth()` using method = 'gam'